1 PREFACE

1.1 PROJECT

burden analysis extendend gene region (including regulatory) in sporadic breast cancer in diverse ancestries

1.2 OBJECTIVE

combine raw results of ancestries and substudies in meta analysis

1.4 Version Info

R version 3.6.3 (2020-02-29)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Ubuntu 18.04.5 LTS

Matrix products: default
BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.7.1
LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.7.1

locale:
 [1] LC_CTYPE=en_GB.UTF-8       LC_NUMERIC=C              
 [3] LC_TIME=en_GB.UTF-8        LC_COLLATE=en_GB.UTF-8    
 [5] LC_MONETARY=en_GB.UTF-8    LC_MESSAGES=en_GB.UTF-8   
 [7] LC_PAPER=en_GB.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=en_GB.UTF-8 LC_IDENTIFICATION=C       

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
 [1] metap_1.3         data.table_1.12.8 forcats_0.5.0     stringr_1.4.0    
 [5] dplyr_0.8.5       purrr_0.3.4       readr_1.3.1       tidyr_1.0.3      
 [9] tibble_3.0.1      ggplot2_3.3.0     tidyverse_1.3.0   rmdformats_0.3.7 
[13] knitr_1.28       

loaded via a namespace (and not attached):
 [1] Biobase_2.46.0      httr_1.4.1          jsonlite_1.7.1     
 [4] splines_3.6.3       modelr_0.1.7        sn_1.6-2           
 [7] Rdpack_0.11-1       assertthat_0.2.1    stats4_3.6.3       
[10] TFisher_0.2.0       cellranger_1.1.0    yaml_2.2.1         
[13] numDeriv_2016.8-1.1 pillar_1.4.4        backports_1.1.7    
[16] lattice_0.20-41     glue_1.4.2          digest_0.6.26      
[19] rvest_0.3.5         colorspace_1.4-1    sandwich_2.5-1     
[22] htmltools_0.5.0     Matrix_1.2-18       pkgconfig_2.0.3    
[25] bibtex_0.4.2.2      broom_0.5.6         haven_2.3.1        
[28] bookdown_0.19       mvtnorm_1.1-1       scales_1.1.1       
[31] generics_0.0.2      ellipsis_0.3.1      TH.data_1.0-10     
[34] withr_2.3.0         BiocGenerics_0.32.0 cli_2.0.2          
[37] mnormt_1.5-7        survival_3.2-7      magrittr_1.5       
[40] crayon_1.3.4        readxl_1.3.1        evaluate_0.14      
[43] fs_1.4.1            fansi_0.4.1         MASS_7.3-53        
[46] nlme_3.1-149        xml2_1.3.2          tools_3.6.3        
[49] hms_0.5.3           gbRd_0.4-11         formatR_1.7        
[52] lifecycle_0.2.0     multcomp_1.4-13     mutoss_0.1-12      
[55] munsell_0.5.0       reprex_0.3.0        plotrix_3.7-8      
[58] compiler_3.6.3      rlang_0.4.8         grid_3.6.3         
[61] rstudioapi_0.11     rmarkdown_2.1       multtest_2.42.0    
[64] codetools_0.2-16    gtable_0.3.0        DBI_1.1.0          
[67] R6_2.4.1            zoo_1.8-8           lubridate_1.7.8    
[70] stringi_1.4.6       parallel_3.6.3      Rcpp_1.0.5         
[73] vctrs_0.3.0         dbplyr_1.4.3        tidyselect_1.1.0   
[76] xfun_0.13          

1.7 define some functions

2 Load inputs

2.2 Load data for ancestries

3 Quality Control

3.7 QQ Plots and Genomic Inflation

3.7.4 Lambda

4 Combine association data

> assocs = asian %>% select(1, Asian_Num_Variants = 5, Asian_PValue = 6, Asian_FDR = 7, 
+     Asian_PValueGC = 11) %>% full_join(african) %>% select(1:5, Afrian_Num_Variants = VariantNumber, 
+     African_PValue = PValue, African_FDR = P.fdr, African_PValueGC = PValueGC) %>% 
+     full_join(hispanic) %>% select(1:9, Hispanic_Num_Variants = VariantNumber, Hispanic_PValue = PValue, 
+     Hispanic_FDR = P.fdr, Hispanic_PValueGC = PValueGC) %>% full_join(eur01A) %>% 
+     select(1:13, Eur01A_Num_Variants = VariantNumber, Eur01A_PValue = PValue, Eur01A_FDR = P.fdr, 
+         Eur01A_PValueGC = PValueGC) %>% full_join(eur01B) %>% select(1:17, Eur01B_Num_Variants = VariantNumber, 
+     Eur01B_PValue = PValue, Eur01B_FDR = P.fdr, Eur01B_PValueGC = PValueGC) %>% full_join(eur01C) %>% 
+     select(1:21, Eur01C_Num_Variants = VariantNumber, Eur01C_PValue = PValue, Eur01C_FDR = P.fdr, 
+         Eur01C_PValueGC = PValueGC) %>% full_join(eur01D) %>% select(1:25, Eur01D_Num_Variants = VariantNumber, 
+     Eur01D_PValue = PValue, Eur01D_FDR = P.fdr, Eur01D_PValueGC = PValueGC) %>% full_join(eur02A) %>% 
+     select(1:29, Eur02A_Num_Variants = VariantNumber, Eur02A_PValue = PValue, Eur02A_FDR = P.fdr, 
+         Eur02A_PValueGC = PValueGC) %>% full_join(eur02B) %>% select(1:33, Eur02B_Num_Variants = VariantNumber, 
+     Eur02B_PValue = PValue, Eur02B_FDR = P.fdr, Eur02B_PValueGC = PValueGC) %>% full_join(eur03A) %>% 
+     select(1:37, Eur03A_Num_Variants = VariantNumber, Eur03A_PValue = PValue, Eur03A_FDR = P.fdr, 
+         Eur03A_PValueGC = PValueGC) %>% full_join(eur03B) %>% select(1:41, Eur03B_Num_Variants = VariantNumber, 
+     Eur03B_PValue = PValue, Eur03B_FDR = P.fdr, Eur03B_PValueGC = PValueGC) %>% full_join(eur04) %>% 
+     select(1:45, Eur04_Num_Variants = VariantNumber, Eur04_PValue = PValue, Eur04_FDR = P.fdr, 
+         Eur04_PValueGC = PValueGC) %>% full_join(eur05A) %>% select(1:49, Eur05A_Num_Variants = VariantNumber, 
+     Eur05A_PValue = PValue, Eur05A_FDR = P.fdr, Eur05A_PValueGC = PValueGC) %>% full_join(eur05B) %>% 
+     select(1:53, Eur05B_Num_Variants = VariantNumber, Eur05B_PValue = PValue, Eur05B_FDR = P.fdr, 
+         Eur05B_PValueGC = PValueGC) %>% full_join(eur05C) %>% select(1:57, Eur05C_Num_Variants = VariantNumber, 
+     Eur05C_PValue = PValue, Eur05C_FDR = P.fdr, Eur05C_PValueGC = PValueGC)

5 Perform Meta Analysis Data

Meta ANalysis based on raw PValues in subcohorts except for subcohort Eur02B (lambda1000:1.320551) and Hispanic (lambda1000:1.1369632) cohorts will use genomic control corrected PValues.

5.3 perform meta-analysis

> for (i in 1:nrow(assocs)) {
+     # temporary data for metaP value calculation for all cohorts
+     tmpAll = Meta %>% slice(i) %>% select(Asian_PValue, African_PValue, Hispanic_PValueGC, 
+         Eur01A_PValue, Eur01B_PValue, Eur01C_PValue, Eur01D_PValue, Eur02A_PValue, 
+         Eur02B_PValueGC, Eur03A_PValue, Eur03B_PValue, Eur04_PValue, Eur05A_PValue, 
+         Eur05B_PValue, Eur05C_PValue) %>% unlist()
+     
+     # temporary data for metaP value calculation for European cohorts
+     tmpEuro = Meta %>% slice(i) %>% select(Eur01A_PValue, Eur01B_PValue, Eur01C_PValue, 
+         Eur01D_PValue, Eur02A_PValue, Eur02B_PValueGC, Eur03A_PValue, Eur03B_PValue, 
+         Eur04_PValue, Eur05A_PValue, Eur05B_PValue, Eur05C_PValue) %>% unlist()
+     
+     # meta analysis using Stouffer
+     Meta$StoufferMetaP[i] = sumz(p = tmpAll, weights = sampleSizeAll, na.action = na.omit)$p
+     Meta$StoufferMetaEuropeanP[i] = sumz(p = tmpEuro, weights = sampleSizeEuro, na.action = na.omit)$p
+     
+     # meta analysis using METAL method
+     Meta$MetalMetaP[i] = METAL_Sample_size_based_meta_PValue(pvalues = tmpAll, weights = sampleSizeAll)
+     Meta$MetalMetaEuropeanP[i] = METAL_Sample_size_based_meta_PValue(pvalues = tmpEuro, 
+         weights = sampleSizeEuro)
+     
+     Meta$Number_missingPvalues[i] = sum(is.na(tmpAll))
+ }
> 
> Meta$StoufferMetaP_FDR = p.adjust(Meta$StoufferMetaP, method = "BH")
> Meta$StoufferMetaEuropeanP_FDR = p.adjust(Meta$StoufferMetaEuropeanP, method = "BH")
> Meta$MetalrMetaP_FDR = p.adjust(Meta$MetalMetaP, method = "BH")
> Meta$MetalMetaEuropeanP_FDR = p.adjust(Meta$MetalMetaEuropeanP, method = "BH")